Load the package

require(cytofkit)  ## version >= 1.4.6
## Loading required package: cytofkit
## Loading required package: ggplot2
## Loading required package: plyr
require(ggplot2)
require(reshape2)
## Loading required package: reshape2
require(plotly)
## Loading required package: plotly
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:graphics':
## 
##     layout
setwd("/Users/chenhao/GitProject/cytofkit_data_codes")  ## change this to your path

Parameter setting and Data loading

dimReductionMethod <- "tsne"
visualizationMethods <- "tsne"
clusterMethods <- c("Rphenograph", "ClusterX", "DensVM")
markers <- as.character(read.table("parameter.txt", header = TRUE)[ ,1])

xdata <- cytof_exprsExtract(fcsFile = "./CD14negCD19negPBMC_dataset/130515_C2_stim_CD19-.fcs", 
                           transformMethod = "autoLgcl",
                           markers = markers)

Dimensionality reduction

ydata <- cytof_dimReduction(data = xdata, method = "tsne")

Cluster anlsysis using method Rphenograph, ClusterX, DensVM

cluster_res <- lapply(clusterMethods, cytof_cluster, 
                      ydata = ydata,
                      xdata = xdata)
names(cluster_res) <- clusterMethods

Rename the clusters according to positions on t-SNE map for comparation.

tsne_cluster <- as.data.frame(ydata)
tsne_cluster$ClusterX <- as.integer(cluster_res[["ClusterX"]])
tsne_cluster$DensVM <- as.integer(cluster_res[["DensVM"]])
tsne_cluster$PhenoGraph <- as.integer(cluster_res[["Rphenograph"]])

ClusterX_center <- aggregate(cbind(tsne_1, tsne_2) ~ ClusterX, data = tsne_cluster, median)
ClusterX_center$ClusterX_label <- rank(-ClusterX_center$tsne_2)
tsne_cluster$ClusterX <- ClusterX_center$ClusterX_label[match(tsne_cluster$ClusterX, ClusterX_center$ClusterX)]

DensVM_center <- aggregate(cbind(tsne_1, tsne_2) ~ DensVM, data = tsne_cluster, median)
DensVM_center$DensVM_label <- rank(-DensVM_center$tsne_2)
tsne_cluster$DensVM <- DensVM_center$DensVM_label[match(tsne_cluster$DensVM, DensVM_center$DensVM)]

PhenoGraph_center <- aggregate(cbind(tsne_1, tsne_2) ~ PhenoGraph, data = tsne_cluster, median)
PhenoGraph_center$PhenoGraph_label <- rank(-PhenoGraph_center$tsne_2)
tsne_cluster$PhenoGraph <- PhenoGraph_center$PhenoGraph_label[match(tsne_cluster$PhenoGraph, PhenoGraph_center$PhenoGraph)]

data_tsne_cluster <- cbind(xdata, tsne_cluster)
## Sice t-SNE will output different embedding map every time, to reproduce the resutls in 
## cytofkit paper, we loaded the saved t-SNE and  cluster results from our run.
data_tsne_cluster <- read.csv("./CD14negCD19negPBMC_dataset/130515_C2_stim_CD19-_exprs_tsne_cluster.csv", header = TRUE, row.names = 1, check.names = FALSE)

Comparison of clustering methods for subset detection

PhenoGraph cluster plot

cytof_clusterPlot(data_tsne_cluster, xlab = "tsne_1", ylab = "tsne_2", cluster = "PhenoGraph", sampleLabel = FALSE)

ClusterX cluster plot

cytof_clusterPlot(data_tsne_cluster, xlab = "tsne_1", ylab = "tsne_2", cluster = "ClusterX", sampleLabel = FALSE)

DensVM cluster plot

cytof_clusterPlot(data_tsne_cluster, xlab = "tsne_1", ylab = "tsne_2", cluster = "DensVM", sampleLabel = FALSE)

Cluster heatmap plot

PhenoGraph cluster median heatmap

cluster_median <- aggregate(. ~ PhenoGraph, data = data_tsne_cluster[,c(1:36,39)], median)
rownames(cluster_median) <- cluster_median$PhenoGraph
cluster_median <- subset(cluster_median, select = -PhenoGraph)
m <- regexpr("\\<.*\\>", colnames(cluster_median), perl = TRUE)
colnames(cluster_median) <- gsub("<|>", "", regmatches(colnames(cluster_median), m))
cytof_heatmap(cluster_median, baseName = "Cluster Median", cex_row_label = 1, cex_col_label = 0.8)

ClusterX cluster median heatmap

cluster_median <- aggregate(. ~ ClusterX, data = data_tsne_cluster[,c(1:36,40)], median)
rownames(cluster_median) <- cluster_median$ClusterX
cluster_median <- subset(cluster_median, select = -ClusterX)
m <- regexpr("\\<.*\\>", colnames(cluster_median), perl = TRUE)
colnames(cluster_median) <- gsub("<|>", "", regmatches(colnames(cluster_median), m))
cytof_heatmap(cluster_median, baseName = "Cluster Median", cex_row_label = 1, cex_col_label = 0.8)

DensVM cluster median heatmap

cluster_median <- aggregate(. ~ DensVM, data = data_tsne_cluster[,c(1:36,41)], median)
rownames(cluster_median) <- cluster_median$DensVM
cluster_median <- subset(cluster_median, select = -DensVM)
m <- regexpr("\\<.*\\>", colnames(cluster_median), perl = TRUE)
colnames(cluster_median) <- gsub("<|>", "", regmatches(colnames(cluster_median), m))
cytof_heatmap(cluster_median, baseName = "Cluster Median", cex_row_label = 1, cex_col_label = 0.8)

Assess ISOMAP, diffusion map and t-SNE for inferring inter-cluster relationship

## Loading subsample data
subData1 <- read.csv("./CD14negCD19negPBMC_dataset/130515_C2_stim_CD19-_subsample1_tsne.csv", header = TRUE, row.names = 1, check.names = FALSE)
subData2 <- read.csv("./CD14negCD19negPBMC_dataset/130515_C2_stim_CD19-_subsample2_tsne.csv", header = TRUE, row.names = 1, check.names = FALSE)
subData3 <- read.csv("./CD14negCD19negPBMC_dataset/130515_C2_stim_CD19-_subsample3_tsne.csv", header = TRUE, row.names = 1, check.names = FALSE)

t-SNE plot

cytof_clusterPlot(subData1, xlab = "tsne_1", ylab = "tsne_2", 
                  cluster = "cluster", sampleLabel = FALSE, 
                  labelSize = 8, fixCoord = FALSE)

cytof_clusterPlot(subData2, xlab = "tsne_1", ylab = "tsne_2", 
                  cluster = "cluster", sampleLabel = FALSE, 
                  labelSize = 8, fixCoord = FALSE)

cytof_clusterPlot(subData3, xlab = "tsne_1", ylab = "tsne_2", 
                  cluster = "cluster", sampleLabel = FALSE, 
                  labelSize = 8, fixCoord = FALSE)

ISOMAP plot

## Loading subsample data
subData1 <- read.csv("./CD14negCD19negPBMC_dataset/130515_C2_stim_CD19-_subsample1_isomap.csv", header = TRUE, row.names = 1, check.names = FALSE)
subData2 <- read.csv("./CD14negCD19negPBMC_dataset/130515_C2_stim_CD19-_subsample2_isomap.csv", header = TRUE, row.names = 1, check.names = FALSE)
subData3 <- read.csv("./CD14negCD19negPBMC_dataset/130515_C2_stim_CD19-_subsample3_isomap.csv", header = TRUE, row.names = 1, check.names = FALSE)
cytof_clusterPlot(subData1, xlab = "isomap_1", ylab = "isomap_2", 
                  cluster = "cluster", sampleLabel = FALSE, 
                  labelSize = 8, fixCoord = FALSE)

cytof_clusterPlot(subData2, xlab = "isomap_1", ylab = "isomap_2", 
                  cluster = "cluster", sampleLabel = FALSE, 
                  labelSize = 8, fixCoord = FALSE)

cytof_clusterPlot(subData3, xlab = "isomap_1", ylab = "isomap_2", 
                  cluster = "cluster", sampleLabel = FALSE, 
                  labelSize = 8, fixCoord = FALSE)

Diffusion map plot

## diffusion map for subsample 1
dfmap1 <- cytof_progression(data = subData1[ ,1:36], 
                            method = "diffusionmap",
                            out_dim = 4,
                            cluster = subData1[,37],
                            clusterSampleMethod = "all")
##   Runing Diffusion Map...  DONE
## diffusion map for subsample 2
dfmap2 <- cytof_progression(data = subData2[ ,1:36], 
                            method = "diffusionmap",
                            out_dim = 4,
                            cluster = subData2[,37],
                            clusterSampleMethod = "all")
##   Runing Diffusion Map...  DONE
## diffusion map for subsample 3
dfmap3 <- cytof_progression(data = subData3[ ,1:36], 
                            method = "diffusionmap",
                            out_dim = 4,
                            cluster = subData3[,37],
                            clusterSampleMethod = "all")
##   Runing Diffusion Map...  DONE
dfmap1_progression_data <- as.data.frame(do.call(cbind, dfmap1))
cytof_clusterPlot(dfmap1_progression_data, xlab = "diffusionmap_1", ylab = "diffusionmap_2", 
                  cluster = "sampleCluster", sampleLabel = FALSE, 
                  labelSize = 8, fixCoord = FALSE)

dfmap2_progression_data <- as.data.frame(do.call(cbind, dfmap2))
cytof_clusterPlot(dfmap2_progression_data, xlab = "diffusionmap_1", ylab = "diffusionmap_2", 
                  cluster = "sampleCluster", sampleLabel = FALSE, 
                  labelSize = 8, fixCoord = FALSE)

dfmap3_progression_data <- as.data.frame(do.call(cbind, dfmap3))
cytof_clusterPlot(dfmap3_progression_data, xlab = "diffusionmap_1", ylab = "diffusionmap_2", 
                  cluster = "sampleCluster", sampleLabel = FALSE, 
                  labelSize = 8, fixCoord = FALSE)

Progression analysis

## load some functions
isomap_progression <- cytof_progression(data = data_tsne_cluster[ ,1:36], 
                                        cluster = data_tsne_cluster$ClusterX,
                                        method = "isomap",
                                        out_dim = 4,
                                        clusterSampleMethod = "ceil",
                                        clusterSampleSize = 500)
isomap_progression_data <- as.data.frame(do.call(cbind, isomap_progression))
write.csv(isomap_progression_data, "subset_downsample_500_isomap_progression.csv")

dfmap_progression <- cytof_progression(data = data_tsne_cluster[ ,1:36], 
                                        method = "diffusionmap",
                                        out_dim = 4,
                                        cluster = data_tsne_cluster$ClusterX,
                                        clusterSampleMethod = "ceil",
                                        clusterSampleSize = 500)

dfmap_progression_data <- as.data.frame(do.call(cbind, dfmap_progression))
write.csv(dfmap_progression_data, "subset_downsample_500_diffusionmap_progression.csv")

Progression plot

#plot_ly(isomap_progression_data, x = isomap_1, y = isomap_2, z = isomap_3, group = sampleCluster, size = 0.5, type = "scatter3d", mode = "markers")

cytof_clusterPlot(isomap_progression_data, xlab = "isomap_1", ylab = "isomap_2", 
                  cluster = "sampleCluster", sampleLabel = FALSE, 
                  labelSize = 15, fixCoord = FALSE)

#plot_ly(dfmap_progression_data, x = diffusionmap_1, y = diffusionmap_2, z = diffusionmap_3, group = sampleCluster, type = "scatter3d", mode = "markers")

cytof_clusterPlot(dfmap_progression_data, xlab = "diffusionmap_1", ylab = "diffusionmap_2", 
                  cluster = "sampleCluster", sampleLabel = FALSE, 
                  labelSize = 15, fixCoord = FALSE)

visMarkers <- c("(Sm150)Di<GranzymeB>", "(Yb173)Di<Perforin>")

visuaPlot(isomap_progression_data, xlab = "isomap_1", ylab="isomap_2", 
          zlab = visMarkers[1], pointSize = 1.5)

visuaPlot(isomap_progression_data, xlab = "isomap_1", ylab="isomap_2", 
          zlab = visMarkers[2], pointSize = 1.5)

visuaPlot(dfmap_progression_data, xlab = "diffusionmap_1", ylab="diffusionmap_2", 
          zlab = visMarkers[1], pointSize = 1.5)

visuaPlot(dfmap_progression_data, xlab = "diffusionmap_1", ylab="diffusionmap_2", 
          zlab = visMarkers[2], pointSize = 1.5)

cytof_progressionPlot(data=isomap_progression_data, 
                      markers = visMarkers, 
                      clusters = c(11, 14, 12, 15, 13), 
                      orderCol = "isomap_2",
                      clusterCol = "sampleCluster",
                      clusterLabelSize = 10,
                      segmentSize = 1,
                      reverseOrder = TRUE,
                      addClusterLabel = TRUE)

cytof_progressionPlot(data=dfmap_progression_data, 
                      markers = visMarkers, 
                      clusters = c(11, 14, 12, 15, 13), 
                      orderCol = "diffusionmap_2",
                      clusterCol = "sampleCluster",
                      clusterLabelSize = 10,
                      segmentSize = 1,
                      reverseOrder = TRUE,
                      addClusterLabel = TRUE)

Session Information

sessionInfo()
## R version 3.3.0 (2016-05-03)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.11.5 (El Capitan)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] plotly_3.6.5   reshape2_1.4.1 cytofkit_1.5.4 plyr_1.8.4    
## [5] ggplot2_2.1.0 
## 
## loaded via a namespace (and not attached):
##   [1] nlme_3.1-128                tsne_0.1-2                 
##   [3] bitops_1.0-6                matrixStats_0.50.2         
##   [5] destiny_1.2.1               pbkrtest_0.4-6             
##   [7] httr_1.1.0                  doParallel_1.0.10          
##   [9] RColorBrewer_1.1-2          tools_3.3.0                
##  [11] vegan_2.3-5                 R6_2.1.2                   
##  [13] KernSmooth_2.23-15          rpart_4.1-10               
##  [15] Hmisc_3.17-4                BiocGenerics_0.18.0        
##  [17] mgcv_1.8-12                 colorspace_1.2-6           
##  [19] permute_0.9-0               nnet_7.3-12                
##  [21] sp_1.2-3                    gridExtra_2.2.1            
##  [23] chron_2.3-47                graph_1.50.0               
##  [25] quantreg_5.26               Biobase_2.32.0             
##  [27] formatR_1.4                 SparseM_1.7                
##  [29] labeling_0.3                caTools_1.17.1             
##  [31] scales_0.4.0                DEoptimR_1.0-4             
##  [33] lmtest_0.9-34               mvtnorm_1.0-5              
##  [35] robustbase_0.92-6           proxy_0.4-15               
##  [37] stringr_1.0.0               digest_0.6.9               
##  [39] foreign_0.8-66              minqa_1.2.4                
##  [41] rmarkdown_0.9.6             base64enc_0.1-3            
##  [43] rrcov_1.3-11                htmltools_0.3.5            
##  [45] lme4_1.1-12                 htmlwidgets_0.6            
##  [47] pdist_1.2                   flowCore_1.38.2            
##  [49] VGAM_1.0-2                  FNN_1.1                    
##  [51] shiny_0.13.2                jsonlite_0.9.21            
##  [53] zoo_1.7-13                  gtools_3.5.0               
##  [55] acepack_1.3-3.3             car_2.1-2                  
##  [57] magrittr_1.5                Formula_1.2-1              
##  [59] Matrix_1.2-6                Rcpp_0.12.5                
##  [61] munsell_0.4.3               viridis_0.3.4              
##  [63] scatterplot3d_0.3-37        stringi_1.1.1              
##  [65] yaml_2.1.13                 MASS_7.3-45                
##  [67] gplots_3.0.1                Rtsne_0.10                 
##  [69] grid_3.3.0                  parallel_3.3.0             
##  [71] gdata_2.17.0                ggrepel_0.5                
##  [73] lattice_0.20-33             splines_3.3.0              
##  [75] knitr_1.13                  tcltk_3.3.0                
##  [77] igraph_1.0.1                RUnit_0.4.31               
##  [79] corpcor_1.6.8               codetools_0.2-14           
##  [81] stats4_3.3.0                XML_3.98-1.4               
##  [83] evaluate_0.9                latticeExtra_0.6-28        
##  [85] data.table_1.9.6            vcd_1.4-1                  
##  [87] nloptr_1.0.4                httpuv_1.3.3               
##  [89] foreach_1.4.3               MatrixModels_0.4-1         
##  [91] VIM_4.4.1                   tidyr_0.5.0                
##  [93] gtable_0.2.0                RANN_2.5                   
##  [95] assertthat_0.1              mime_0.4                   
##  [97] xtable_1.8-2                RcppEigen_0.3.2.8.1        
##  [99] ConsensusClusterPlus_1.36.0 e1071_1.6-7                
## [101] flowUtils_1.36.0            class_7.3-14               
## [103] survival_2.39-4             pcaPP_1.9-60               
## [105] tibble_1.0                  iterators_1.0.8            
## [107] FlowSOM_1.4.0               cluster_2.0.4